# coding=utf-8
from locale import *
import datetime
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup


def get_mac_address():
    import uuid
    node = uuid.getnode()
    mac = uuid.UUID(int=node).hex[-12:]
    return mac


def validate_mac_address():
    import urllib.request
    f = urllib.request.urlopen('http://amazon-ceping.xunhuanle.com/publicwelcome/getallmacaddress')
    ret_content_bytes = f.read()
    ret_content_str = ret_content_bytes.decode()
    return ret_content_str

def set_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_argument("--test-type")
    options.add_experimental_option('excludeSwitches', ['enable-automation'])

    prefs = {"profile.managed_default_content_settings.images": 2}
    options.add_experimental_option("prefs", prefs)
    path = './chromedriver.exe'
    driver = webdriver.Chrome(executable_path=path,options=options)
    return driver


def run_crawler(url, run_times, key_words, open_file):
    # 设置不加载图片
    driver = set_driver()
    driver.get(url)
    i = 1
    while i < run_times:
        i += 1
        soup = BeautifulSoup(driver.page_source, "html.parser")
        allItems = soup.select('div.s-result-item')
        for item in allItems:
            file = open(open_file, 'a')
            data_asin_txt = item.get("data-asin")
            print(data_asin_txt)
            is_useful = 0
            star_text = "0"
            ping_lun_text = "0"
            link_text = ""
            spans = item.select('span[aria-label]')
            if spans is not None:
                for span_elm in spans:
                    span_txt1 = span_elm.get_text()
                    if key_words in span_elm.get_text():
                        is_useful = 1
            if is_useful:
                print("可备选")
                link_elm = item.select_one('span > a.a-link-normal')
                if link_elm is not None:
                    link_text = "https://www.amazon.com" + link_elm.get("href")
                star_elm = item.select_one('span.a-icon-alt')
                if star_elm is not None:
                    star_text = star_elm.get_text()
                ping_lun_elm = item.select_one('span > a >span.a-size-base')
                if ping_lun_elm is not None:
                    ping_lun_text = ping_lun_elm.get_text()
                print(link_text)
                print(star_text)
                print(ping_lun_text)
                file.writelines(link_text + '\t' + data_asin_txt + '\t' + star_text + '\t' + ping_lun_text + '\n')
            else:
                print("")
            print("=================================================")
            file.close()
        #WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.ID, 'pagnNextLink')))
        WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.a-last a')))
        #nextPageUrl = driver.find_element_by_id("pagnNextLink").get_attribute("href")
        nextPageUrl = driver.find_element_by_css_selector(".a-last a").get_attribute("href")
        print(nextPageUrl)

        if i < run_times:
            driver.get(nextPageUrl)
        else:
            driver.quit()
            run_crawler(nextPageUrl, run_times, key_words, open_file)

'''
mac_address = get_mac_address()
print("Your macaddress is below:")
print(mac_address)
validation_content = validate_mac_address()
if mac_address not in validation_content:
    print("Please submit your unicode '"+mac_address+"' to administrator!!!")
    sys.exit()
'''

file_name = "found_link_"+datetime.datetime.now().strftime('%Y-%m-%d_%H_%M_%S')+".xls"

search_url = input("Input Search URL:")
print("Search url is:", search_url)

keyWords = input("Input Unavailable Keywords:")
keyWords = keyWords.strip()
print("Unavailable Keywords is:", keyWords)

runTimes = input("Run Times Every Cycle:")
runTimes = runTimes.strip()
print("Run Times Every Cycle is:", runTimes)

setlocale(LC_NUMERIC, 'English_US')

run_crawler(search_url, int(runTimes), keyWords, file_name)






